package com.shusiedu.controller;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Range;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;

public class WordReading {

    public static void main(String[] args) throws IOException {
        String filePath = "E:\\ceshi\\ceshi.doc";

        String filePath2 = "E:\\ceshi\\ceshi22.doc";

        String content = "this is   String content = ";





//        readOnWord(filePath);
//        File file = new File(filePath);
//        InputStream fis = new FileInputStream(file);
//        WordExtractor wordExtractor = new WordExtractor(fis);//使用HWPF组件中WordExtractor类从Word文档中提取文本或段落
//        int i = 1;
//        Map wordMap = new LinkedHashMap();//创建一个map对象存放word中的内容
//        for (String words : wordExtractor.getParagraphText()) {//获取段落内容
//            System.out.println(words);
//            //输出文字到文档里面
//            wordMap.put("DOC文档，第（" + i + "）段内容", words);
//            i++;
//        }
//        fis.close();



    }

    public static void readOnWord(String filePath) {
        if (filePath.endsWith(".doc")) {

            // 输入流-基类
            InputStream is = null;
            try {
                is = new FileInputStream(filePath);
            } catch (FileNotFoundException e) {
                e.printStackTrace();
                System.out.println("文件打开失败。");
            }

            // 加载doc文档
            try {

                HWPFDocument doc = new HWPFDocument(is);
                Range text = doc.getRange();// 整个文档
                /*
                 * 分解word：文本 ->小节 ->段落 ->characterRun(理解为小单元）
                 * section -小节; paragraph - 段落
                 */


                //1分出内容节点
                Range hotWord = text.getSection(2);// 0-封面，1-目录，2-文本；第3小节


                //2段落处理
                /*
                 * 维护两个变量
                 *
                 * 热词和解释区别 ：大小-word:26,explaining:18
                 *
                 */
                String word = "";
                String explaining = "";
                int wordOK = 0;
                int explainOK = 0;// 判断当前word&explain是否可以填入数据库

                int count = 24;// 读取几条数据到数据库
                int begin = 2;// 段落读取位置

                for (int i = 0; i < count; ) {
                    Range para = hotWord.getParagraph(begin);
                    CharacterRun field = para.getCharacterRun(0);
                    int fontSize = field.getFontSize();
                    if (fontSize == 26) {
                        word = para.text();
                        wordOK = 1;
                        begin++;
                    } else {
                        while (fontSize < 26) {
                            explaining += para.text();
                            begin++;
                            para = hotWord.getParagraph(begin);
                            field = para.getCharacterRun(0);
                            fontSize = field.getFontSize();
                        }
                        explainOK = 1;
                    }
                    // 判断word&explain是否可以填入数据库
//                    if (wordOK == 1 && explainOK == 1) {
//                        MysqlDaoImp.addData(word, explaining);
//                        i++;
//                        //填入数据库后，一切归"0"
//                        wordOK = 0;
//                        explainOK = 0;
//                        word="";
//                        explaining="";
//                    }
                }
                // 输出测试
                // System.out.println("读取：" + "head:");

            } catch (IOException e) {
                e.printStackTrace();
                System.out.println("IO错误。");
            }

        } else {
            System.out.println("文件格式 error:not .doc");
        }

    }


}
